Dependency Grammar in NLTK

We load the DependencyGrammar module from NLTK Grammar:



In [1]:

    
from nltk.grammar import DependencyGrammar

We can load different Dependency Grammar parsers from NLTK:



In [2]:

    
from nltk.parse import (
    DependencyGraph,
    ProjectiveDependencyParser,
    NonprojectiveDependencyParser,
)



In [3]:

    
treebank_data = """Pierre  NNP     2       NMOD
Vinken  NNP     8       SUB
,       ,       2       P
61      CD      5       NMOD
years   NNS     6       AMOD
old     JJ      2       NMOD
,       ,       2       P
will    MD      0       ROOT
join    VB      8       VC
the     DT      11      NMOD
board   NN      9       OBJ
as      IN      9       VMOD
a       DT      15      NMOD
nonexecutive    JJ      15      NMOD
director        NN      12      PMOD
Nov.    NNP     9       VMOD
29      CD      16      NMOD
.       .       9       VMOD
"""



In [4]:

    
dg = DependencyGraph(treebank_data)



In [5]:

    
dg.tree().pprint()









    



(will
  (Vinken Pierre , (old (years 61)) ,)
  (join (board the) (as (director a nonexecutive)) (Nov. 29) .))



In [6]:

    
for head, rel, dep in dg.triples():
    print(
        '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
        .format(h=head, r=rel, d=dep)
    )









    



(will, MD), SUB, (Vinken, NNP)
(Vinken, NNP), NMOD, (Pierre, NNP)
(Vinken, NNP), P, (,, ,)
(Vinken, NNP), NMOD, (old, JJ)
(old, JJ), AMOD, (years, NNS)
(years, NNS), NMOD, (61, CD)
(Vinken, NNP), P, (,, ,)
(will, MD), VC, (join, VB)
(join, VB), OBJ, (board, NN)
(board, NN), NMOD, (the, DT)
(join, VB), VMOD, (as, IN)
(as, IN), PMOD, (director, NN)
(director, NN), NMOD, (a, DT)
(director, NN), NMOD, (nonexecutive, JJ)
(join, VB), VMOD, (Nov., NNP)
(Nov., NNP), NMOD, (29, CD)
(join, VB), VMOD, (., .)

Dependency Version of the Penn Treebank



In [7]:

    
from nltk.corpus import dependency_treebank



In [8]:

    
t = dependency_treebank.parsed_sents()[0]



In [9]:

    
print(t.to_conll(3))  # doctest: +NORMALIZE_WHITESPACE









    



Pierre	NNP	2
Vinken	NNP	8
,	,	2
61	CD	5
years	NNS	6
old	JJ	2
,	,	2
will	MD	0
join	VB	8
the	DT	11
board	NN	9
as	IN	9
a	DT	15
nonexecutive	JJ	15
director	NN	12
Nov.	NNP	9
29	CD	16
.	.	8

"Using the output of zpar (like Malt-TAB but with zero-based indexing)":



In [10]:

    
zpar_data = """
Pierre  NNP     1       NMOD
Vinken  NNP     7       SUB
,       ,       1       P
61      CD      4       NMOD
years   NNS     5       AMOD
old     JJ      1       NMOD
,       ,       1       P
will    MD      -1      ROOT
join    VB      7       VC
the     DT      10      NMOD
board   NN      8       OBJ
as      IN      8       VMOD
a       DT      14      NMOD
nonexecutive    JJ      14      NMOD
director        NN      11      PMOD
Nov.    NNP     8       VMOD
29      CD      15      NMOD
.       .       7       P
"""



In [11]:

    
zdg = DependencyGraph(zpar_data, zero_based=True)



In [12]:

    
print(zdg.tree())









    



(will
  (Vinken Pierre , (old (years 61)) ,)
  (join (board the) (as (director a nonexecutive)) (Nov. 29))
  .)

Projective Dependency Parsing



In [13]:

    
grammar = DependencyGrammar.fromstring("""
'fell' -> 'price' | 'stock'
'price' -> 'of' 'the'
'of' -> 'stock'
'stock' -> 'the'
""")



In [14]:

    
print(grammar)









    



Dependency grammar with 5 productions
  'fell' -> 'price'
  'fell' -> 'stock'
  'price' -> 'of' 'the'
  'of' -> 'stock'
  'stock' -> 'the'



In [15]:

    
dp = ProjectiveDependencyParser(grammar)



In [16]:

    
for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
    print(t)









    



(fell (price the (of (stock the))))
(fell (price the of) (stock the))
(fell (price the of the) stock)

Non-Projective Dependency Parsing



In [17]:

    
grammar = DependencyGrammar.fromstring("""
'taught' -> 'play' | 'man'
'man' -> 'the'
'play' -> 'golf' | 'dog' | 'to'
'dog' -> 'his'
""")



In [18]:

    
print(grammar)









    



Dependency grammar with 7 productions
  'taught' -> 'play'
  'taught' -> 'man'
  'man' -> 'the'
  'play' -> 'golf'
  'play' -> 'dog'
  'play' -> 'to'
  'dog' -> 'his'



In [19]:

    
dp = NonprojectiveDependencyParser(grammar)



In [20]:

    
g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])



In [21]:

    
print(g.root['word'])









    



taught



In [22]:

    
print(g)









    



defaultdict(<function DependencyGraph.__init__.<locals>.<lambda> at 0x0000026AC1D89798>,
            {0: {'address': 0,
                 'ctag': 'TOP',
                 'deps': defaultdict(<class 'list'>, {'ROOT': [3]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': 'TOP',
                 'word': None},
             1: {'address': 1,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'the'},
             2: {'address': 2,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [1]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'man'},
             3: {'address': 3,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [2, 7]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'taught'},
             4: {'address': 4,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'his'},
             5: {'address': 5,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [4]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'dog'},
             6: {'address': 6,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'to'},
             7: {'address': 7,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [5, 6, 8]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'play'},
             8: {'address': 8,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'golf'}})



In [23]:

    
x = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
for i in x:
    print(i)









    



defaultdict(<function DependencyGraph.__init__.<locals>.<lambda> at 0x0000026AC1D89B88>,
            {0: {'address': 0,
                 'ctag': 'TOP',
                 'deps': defaultdict(<class 'list'>, {'ROOT': [3]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': 'TOP',
                 'word': None},
             1: {'address': 1,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'the'},
             2: {'address': 2,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [1]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'man'},
             3: {'address': 3,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [2, 7]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'taught'},
             4: {'address': 4,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'his'},
             5: {'address': 5,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [4]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'dog'},
             6: {'address': 6,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'to'},
             7: {'address': 7,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [5, 6, 8]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'play'},
             8: {'address': 8,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'golf'}})



In [24]:

    
for _, node in sorted(g.nodes.items()):
    if node['word'] is not None:
        print('{address} {word}: {d}'.format(d=node['deps'][''], **node))









    



1 the: []
2 man: [1]
3 taught: [2, 7]
4 his: []
5 dog: [4]
6 to: []
7 play: [5, 6, 8]
8 golf: []



In [25]:

    
print(g.tree())









    



(taught (man the) (play (dog his) to golf))



In [ ]: